""" this routine will generate closest-member histograms statistics,
    save them to a netCDF file, and make diagnostic plots
"""

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from numpy import ma
import os, sys
from netCDF4 import Dataset
import pygrib
from dateutils import daterange, dateshift
from matplotlib import rcParams
import scipy.signal as signal
import scipy.stats as stats
from scipy.stats import gamma
from matplotlib.backends.backend_pdf import PdfPages

rcParams['legend.fontsize']='medium'
rcParams['legend.fancybox']=True
rcParams['xtick.labelsize']='medium'

data_directory = '/Users/thamill/precip/ecmwf_data/' 

# --- COMMAND LINE INPUT

cmodel = sys.argv[1] # NCEP, ECMWF, or CMC
cleade = sys.argv[2] # ending of periods lead time in hours, e.g., '24', '108'
ileade = int(cleade)
date_forecast = sys.argv[3] # yyyymmddhh format, date of initial condition

# --- define the previous 60 days that should have available data

date_end = dateshift(date_forecast,-ileade)
date_begin = dateshift(date_end, -61*24)
date_list = daterange(date_begin, date_end, 24) # makes a character list of dates 
ndates = len(date_list)

# ======================================================================
# Part 1:  reading in the daily data and summing up the information over
#          all case days
# ======================================================================

# --- read a sample file to set array dimensions.   These were 
#     previously generated by the fortran program 
#     generate_dressing_stats_anymodel_gammacdf.f90

infile = data_directory+'closest_hist_stats_' + \
    cmodel + '_2016050100_fhour48.nc'
print infile
nc = Dataset(infile)    
closest_histogram = nc.variables['closest_histogram'][:]
npv, nmembersx25 = np.shape(closest_histogram)
thresh_light = nc.variables['thresh_light'][0]
thresh_mod = nc.variables['thresh_mod'][0]
thresh_high = nc.variables['thresh_high'][0]
nhist = len(closest_histogram)
nc.close()

# ---- declare the arrays that will hold the summary information
#      tallied over many days.  

closest_histogram_total = np.zeros((3,nmembersx25),dtype=np.float32)

# ---- loop over dates, read in data, and add the latest day's data
#      to the overall summation arrays

for idate, date in zip(range(ndates), date_list):
    infile = data_directory+'closest_hist_stats_' + \
        cmodel + '_' + date + '_fhour'+cleade+'.nc'        
    print infile
    fexist  = os.path.exists(infile)
    if fexist:
        try: 
            
            #  --- read in 
            nc = Dataset(infile)               
            closest_histogram = nc.variables['closest_histogram'][:,:]

            # ---- add to sum over many days
            
            closest_histogram_total = closest_histogram_total + \
                closest_histogram  
            nc.close()   
                
        except (IOError, ValueError, RuntimeError):
            print 'Error reading ', infile
    
# ---- now we turn our attention to generating the closest histogram statistics
#      for the sorted ensemble.  Normalize the closest histograms so they sum 
#      to 1.0.   Then Savitzky-Golay smooth the interior values. 

for icat in range(3):
    closest_histogram_total[icat,:] = closest_histogram_total[icat,:] / \
        np.sum(closest_histogram_total[icat,:])

for i in range(180):
    print i,closest_histogram_total[0,i], closest_histogram_total[1,i],\
        closest_histogram_total[2,i]
        
closest_histogram_savgol = np.copy(closest_histogram_total)
for i in range(3):
    csavgol = np.copy(closest_histogram_savgol[i,:])
    work = np.copy(csavgol[4:-4])
    csavgol[4:-4] = signal.savgol_filter(work, 9, 2, mode='interp')
    closest_histogram_savgol[i,:] = csavgol[:] / np.sum(csavgol)

# ======================================================================
# ---- PART 2:  Save the closest-member histogram info to netcdf files
# ======================================================================

# ---- (2a) define netCDF file name for gamma distribution parameters and open
#      the file

outfile_nc = data_directory+cmodel+'/closest_histogram_'+\
    cmodel+'_date='+date_forecast+'_lead='+cleade+'.nc'
        
print 'writing netCDF closest histogram data to ',outfile_nc
rootgrp = Dataset(outfile_nc,'w',format='NETCDF4_CLASSIC')

# ---- (2b) declare array dimensions, indices for arrays

npcats = 3 
npcatvals = rootgrp.createDimension('npcatvals',npcats)
npcat_indices = rootgrp.createVariable('npcat_indices','i4',('npcatvals',))
npcat_indices.long_name = "indices for discretization of arrays by '+\
    'ensemble-mean precipitation forecast amounts"
npcat_indices.units = "n/a"

nmembervals = rootgrp.createDimension('nmembervals',nmembersx25)
nmember_indices = rootgrp.createVariable('nmember_indices','i4',('nmembervals',))
nmember_indices.long_name = "sorted ensemble member number"
nmember_indices.units = "n/a"

# ---- (2c) declare arrays that will hold the dressing data

precip_thresholds =  rootgrp.createVariable('precip_thresholds',\
    'f4',('npcatvals',), zlib=True,least_significant_digit=4)
precip_thresholds.units = "n/a"
precip_thresholds.long_name = \
    "Precipitation amounts that define borders between no, light, mod, heavy precip"
precip_thresholds.valid_range = [0.,100.]
precip_thresholds.missing_value = -99.99

closest_histogram = rootgrp.createVariable('closest_histogram',\
    'f4',('npcatvals','nmembervals',), zlib=True,least_significant_digit=6)
closest_histogram.units = "n/a"
closest_histogram.long_name = \
    "histogram of how often this sorted member is the closet to analyzed value"
closest_histogram.valid_range = [0.,1.]
closest_histogram.missing_value = -99.99

# --- (2d) write calculated values for netcdf file indices and variables

npcat_indices = range(npcats)
precip_thresholds[:] = np.array([np.float(thresh_light), \
    np.float(thresh_mod), np.float(thresh_high)])
nmember_indices[:] = range(nmembersx25)
closest_histogram[:,:] = closest_histogram_savgol[:,:]

# ---- (2e) more metadata

rootgrp.stream = "s4" # ????
rootgrp.title = "histograms of how often this sorted member is the closet to analyzed value for "+cmodel
rootgrp.Conventions = "CF-1.0"  # ????
rootgrp.history = "Created ~ Mar 2018 by Tom Hamill, tom.hamill@noaa.gov"
rootgrp.institution = "NOAA/ESRL/PSD"
rootgrp.platform = "Model"
rootgrp.references = "None"

# ---- (2f) close the file, and done

rootgrp.close()
sys.exit()   # uncomment if plots desired.

# ======================================================================
# ---- PART 3:  now make some diagnostic plots, save these to pdf file
# ======================================================================

fig = plt.figure(figsize=(6.,9.))
plt.suptitle('Closest-member histograms for lead = +'+\
    cleade+' h,\nmodel = '+cmodel,fontsize=16)
for i in range(3):
    if i == 0:
        ctitle = r'(a) 0.01 mm $\leq$ mean precip. < 2 mm '
        plotdata1 = closest_histogram_total[0,:]/2
        plotdata2 = closest_histogram_savgol[0,:]
        axlocn = [0.15,0.68,0.8,0.21]
    elif i == 2:
        ctitle = r'(b) 2 mm $\leq$ mean precip. < 6 mm '
        plotdata1 = closest_histogram_total[1,:]/2
        plotdata2 = closest_histogram_savgol[1,:]
        axlocn = [0.15,0.36,0.8,0.21]
    else:
        ctitle = r'(c) mean precip. $\geq$ 6 mm'
        plotdata1 = closest_histogram_total[2,:]/2
        plotdata2 = closest_histogram_savgol[2,:]
            axlocn = [0.15,0.06,0.8,0.21]

    a1 = fig.add_axes(axlocn)
    a1.set_title(ctitle,fontsize=14)
    a1.set_xlabel('Rank of sorted closest member to analyzed',fontsize=12)
    a1.set_ylabel('Fraction',fontsize=12)
    a1.set_xlim(0,nmembersx25+2)
    a1.set_ylim(0.0001,0.5)
    a1.set_yscale('log')
    a1.grid(color='Gray',lw=0.2,linestyle='--')
    a1.plot(range(1,nmembersx25+1), plotdata1,'-',\
        color='Red',label='Raw / 2',linewidth=2)
    a1.plot(range(1,nmembersx25+1), plotdata2,'-',\
        color='RoyalBlue',label='Smoothed',linewidth=2)
    ctext_low = "{:.3f}".format(plotdata2[0])
    ctext_high = "{:.3f}".format(plotdata2[-1])
    a1.text(nmembersx25*0.01,plotdata2[0],ctext_low,color='RoyalBlue')
    a1.text(nmembersx25*0.88,plotdata2[-1],ctext_high,color='RoyalBlue')
    if i == 0: a1.legend(loc=9)

plot_title = 'closest_histogram_'+cmodel+'_'+cyyyymmddhh+'_'+cleade+'h.pdf'
fig1.savefig(plot_title)
print 'saving plot to file = ',plot_title

pdf.savefig()
plt.close()
        










